# Author: Mamoor A. Khan
# Purpose: Create a balance table
# Dated:  Mar 10, 2025


# Clear data
rm(list = ls())
options(scipen = 999)
# pacman::p_unload(all)
# Install and load pacman if not already done
if (!requireNamespace("pacman", quietly = TRUE)) {
   install.packages("pacman")
}

pacman::p_load(tidyverse, here, xtable, estimatr, car, sandwich, kableExtra)


# Clear the files in the 3_output folder
all_files <- list.files("../3_output", recursive = TRUE, full.names = TRUE)
file.remove(all_files)



mdat <- read_csv("../1_data/1_hh_data/baseline_data.csv") %>%
   mutate(
      H1_control = ifelse(w1_s1_treat == "H1_control", 1, 0),
      H3_call_qs = ifelse(w1_s1_treat == "H3_call_qs", 1, 0),
      H2_call = ifelse(w1_s1_treat == "H2_call", 1, 0)
   )

mdat1 <- mdat %>%
   filter(w1_s1_treat == "H2_call")

mdat2 <- mdat %>%
   filter(w1_s1_treat == "H3_call_qs")




# FUNCTION to get the needed analysis

create_regressions_df <- function(regdat, treat_var, rows = 1:3, hyp_terms = NULL) {
   # Model estimation
   models <- lapply(outcomes, function(y) {
      modelest <- tidy(lm_robust(as.formula(paste(y, "~ -1 +", treat_var, "+ factor(ps_name)")), se_type = "HC1", data = regdat))[rows, ]

      modelP <- lm(as.formula(paste(y, "~ -1 +", treat_var, "+ factor(ps_name)")), data = regdat)
      ab <- if (!is.null(hyp_terms)) {
         round(linearHypothesis(modelP, hyp_terms, vcov = vcovHC(modelP, type = "HC1"))$P[2], 3)
      } else {
         NA
      }
      list(model = modelest, pval = ab)
   })
   names(models) <- outcomes

   # Results table
   res <- map_dfr(models, function(x) {
      tibble(
         term = x$model$term,
         estimate = x$model$estimate
      ) %>%
         bind_rows(tibble(
            term = paste0(x$model$term, "_SE"),
            estimate = x$model$std.error
         )) %>%
         mutate(pvalue = x$pval)
   }, .id = "outcome") %>%
      arrange(outcome, term) %>%
      mutate(
         estimate = round(estimate, 3),
         outcome = factor(outcome, levels = outcomes, labels = outcomeslab)
      )

   return(res)
}


fun.shape.data <- function(x) {
   x %>%
      mutate(
         type = ifelse(grepl("_SE", term), "SE", "Est"), # Tag estimates vs SE
         term = gsub("_SE", "", term), # Clean term once
         estimate = ifelse(type == "SE", paste0("(", estimate, ")"), as.character(estimate)) # Format SE
      ) %>%
      select(-pvalue) %>% # Drop pvalue once
      spread(term, estimate, fill = "") %>% # Single spread for both Est and SE
      select(-type) %>%
      arrange(outcome) %>%
      group_by(outcome) %>%
      mutate(
         outcome = ifelse(row_number() > 1, "", as.character(outcome)), # Deduplicate outcome
         pvalue = ifelse(row_number() == 1,
            x$pvalue[!duplicated(x$outcome)]
            [match(outcome, x$outcome[!duplicated(x$outcome)])],
            ""
         )
      ) %>%
      ungroup() %>%
      mutate_all(~ replace(., is.na(.), "")) %>%
      ungroup()
}

# List outcomes and their labels
outcomes <- c("mpa_therm", "mpa_party_therm", "educ_years", "income_scale", "copartisan", "legis_import", "dev_import", "efficacy")
outcomeslab <- c(
   "1) MPA Thermometer",
   "2) MPA Party Thermometer",
   "3) Education Years",
   "4) Income Scale",
   "5) Copartisan",
   "6) Legislation Importance",
   "7) Development Importance",
   "8) Efficacy"
)



# BALANCE TABLE (1)

results1 <- create_regressions_df(
   regdat = mdat,
   treat_var = "w1_s1_treat",
   rows = 1:3,
   hyp_terms = c(
      "w1_s1_treatH1_control - w1_s1_treatH2_call = 0",
      "w1_s1_treatH2_call - w1_s1_treatH3_call_qs = 0"
   )
)

balance_table1 <- results1 %>%
   fun.shape.data(.) %>%
   mutate_all(~ replace(., is.na(.), "")) %>% # Replace NA with "" once
   setNames(c(" ", "H0", "H1", "H2", "P-value")) %>% # Rename columns
   add_row(" " = "Household Characteristics", H0 = "", H1 = "", H2 = "", "P-value" = "", .before = 1)



# BALANCE TABLE (2)

results2 <- create_regressions_df(
   regdat = mdat1,
   treat_var = "w1_s2_treat",
   rows = 1:2,
   hyp_terms = c("w1_s2_treatHC_control - w1_s2_treatHG_generic = 0")
)


balance_table2 <- results2 %>%
   fun.shape.data(.) %>%
   mutate_all(~ replace(., is.na(.), "")) %>% # Replace NA with "" once
   setNames(c(" ", "H1C", "H1Q", "P-value")) %>% # Rename columns
   add_row(" " = "Household Characteristics", H1C = "", H1Q = "", `P-value` = "", .before = 1)


# BALANCE TABLE (3)


results3 <- create_regressions_df(
   regdat = mdat2,
   treat_var = "w1_s2_tree",
   rows = 1:3,
   hyp_terms = c(
      "w1_s2_treegeneric_and_question - w1_s2_treequestion = 0",
      "w1_s2_treequestion - w1_s2_treeresponsive_and_question = 0"
   )
)


balance_table3 <- results3 %>%
   fun.shape.data(.) %>%
   mutate_all(~ replace(., is.na(.), "")) %>% # Replace NA with "" once
   setNames(c(" ", "H2C", "H2G", "H2R", "P-value")) %>% # Rename columns
   add_row(" " = "Household Characteristics", H2C = "", H2G = "", H2R = "", `P-value` = "", .before = 1)



balance_table <- bind_cols(balance_table1, balance_table2, balance_table3) %>%
   select(-6, -10)




# Create a styled table without shading


table_output <- kable(balance_table,
   format = "latex", # LaTeX for journal typesetting
   digits = 3,
   booktabs = TRUE, # Professional booktabs style
   col.names = c(
      "", "H0", "H1", "H2", "P-value", "H1C", "H1Q", "P-value",
      "H2C", "H2G", "H2R", "P-value"
   ),
   align = "lrrrrrrrrrrrr"
) %>%
   add_header_above(
      c(
         " " = 1,
         "Tier 1 - Full Sample" = 4,
         "Tier 2 - by H1" = 3,
         "Tier 3 - by H2" = 4
      ),
      bold = TRUE, italic = FALSE
   ) %>%
   kable_styling(
      latex_options = c("hold_position", "scale_down"),
      font_size = 10
   ) %>% # Smaller font for compactness
   column_spec(1, width = "5cm") %>% # Wider first column for readability
   column_spec(2:12, width = "1.2cm") %>% # Tight, uniform numeric columns
   row_spec(0, hline_after = TRUE) %>% # Line under header
   row_spec(1, hline_after = TRUE) # Line after title row
cat(table_output, file = "../3_output/2_tables/tab-D1.tex")


## ITS IMPORTANT TO RESTART RSESSION AFTER THIS SCRIPT - AS SOME LIBRARIES CLASH WHILE RUNNING OTHER ANALYSIS
#  SCRIPTS
rstudioapi::executeCommand("restartR")



######
# END OF SCRIPT
#####
